*global CBP_input "/Users/oldbenjaminschoefer/Dropbox/TFP Measures/CBP"
*global CBP_output "/Users/oldbenjaminschoefer/Dropbox/TFP Measures/CBP"
global CBP_input "/Users/orenziv/Dropbox/Projects and Data/Local TFP/TFP Measures/CBP"
global CBP_output "/Users/orenziv/Dropbox/Projects and Data/Local TFP/TFP Measures/CBP"

*******Figure A1 here***********

*MSA list from https://www.census.gov/geographies/reference-files/time-series/demo/metro-micro/delineation-files.html
import excel "$CBP_input/list1.xls", sheet("List 1") clear
keep if E=="Metropolitan Statistical Area"
keep A E
destring A, gen(msa) force
*force b/c column header, table header still in A
drop A
drop if msa==.
keep msa
duplicates drop
tempfile temp1
save `temp1'


import delimited using "$CBP_input/cbp12msa.txt", clear
keep if substr(naics,-2,2)=="//"
gen naics4=substr(naics,1,4)
destring naics4, force replace
drop if naics4==.
keep if naics4>2999 & naics4<4000
merge m:1 msa using `temp1', keep(3) nogen
count
*381 msas present, 86 industries. 20,418 obs are filled, ie 37.7% of cells have zeros 
gen est_trunk=est
replace est_trunk=100 if est_trunk>=100
cdfplot est_trunk, ytitle("Fraction of Cells (Cond. on > 0)") xscale(log) xlabel(5 10 20 40 60 80 100) xtick(0(5)100, grid)   xtitle("Number of Plants (Log Scale)") graphregion(color(white)) bgcolor(white) ///
text(.05 2 "Fraction 0 = 0.38") 
graph export "$CBP_output/Fig_A1_tau_granularity.pdf", replace

collapse (sum) est n1* n2* n5* emp, by(msa)
gen est_trunk2=est
replace est_trunk2=5500 if est_trunk>5500
cdfplot est_trunk2, ytitle("Fraction of Cells") xscale(log) xlabel(25 50 100 200 500 1000 2000 5000) xtick(25 50 100 200 500(250)5500,grid) xtitle("Number of Plants (Log Scale)") graphregion(color(white)) bgcolor(white) 
graph export "$CBP_output/Fig_A1_xi_granularity.pdf", replace

	
	
	*
	
	
*******Figure A2 here***********


*MSA list from https://www.census.gov/geographies/reference-files/time-series/demo/metro-micro/delineation-files.html
import excel "$CBP_input/list1.xls", sheet("List 1") clear
keep if E=="Metropolitan Statistical Area"
keep A E
destring A, gen(msa) force
*force b/c column header, table header still in A
drop A
drop if msa==.
keep msa
duplicates drop
tempfile temp1
save `temp1'

import delimited using "$CBP_input/cbp12msa.txt", clear
keep if substr(naics,-2,2)=="//"
gen naics4=substr(naics,1,4)
destring naics4, force replace
drop if naics4==.
keep if naics4>2999 & naics4<4000
merge m:1 msa using `temp1', keep(3) nogen

*get per cell employment 
rename n1000_1 n1000_1499
rename n1000_2 n1500_2499
rename n1000_3 n2500_4999
rename n1000_4 n5000

tempfile temp3
save `temp3'

/*
* Choice of ratio for within-bin imputation (see figure note)
clear
tempfile loopresults
set obs 1
gen ratio=.
gen cons=.
gen slope=.
save `loopresults'
foreach num of numlist 1(1)100{
	quietly use `temp3', replace
	local ratio=`num'/100
	quietly cap gen emp_est=0
	quietly cap replace emp_est=0
	local btm=1
	foreach num in 5 10 20 50 100 250 500 1000 1500 2500 5000{
		local top=`num'-1
		local midpt`btm'_`top'=floor((`top'-`btm')*`ratio'+`btm')
		quietly cap drop emp_n`btm'_`top'
		quietly gen emp_n`btm'_`top' = `midpt`btm'_`top''*n`btm'_`top'
		quietly replace emp_est=emp_est+emp_n`btm'_`top'
		local btm=`num'
		}
	
		quietly cap drop emp_n5000 midpt5000 
		quietly gen emp_n5000=emp-emp_est if n5000!=0
		quietly replace emp_n5000=5000*n5000 if emp_n5000<5000*n5000 & n5000!=0
		quietly gen midpt5000=emp_n5000/n5000
		quietly replace midpt5000=0 if n5000==0
		quietly replace emp_est=emp_est+emp_n5000 if n5000!=0
		quietly sum emp_est emp if n5000==0 & emp!=0
		quietly sum emp_est emp if n5000!=0 & emp!=0

	quietly gen lemp=log(emp)
	quietly gen lemp_est=log(emp_est)
	quietly  reg lemp lemp_est if n5000==0 & emp!=0
	local cons = _b[_cons]
	local slope = _b[lemp_est]
	quietly clear
	quietly set obs 1
	quietly gen ratio=`ratio' 
	quietly gen cons=`cons' 
	quietly gen slope=`slope'
	quietly append using `loopresults'
	quietly save `loopresults', replace
	*di _b[_cons]
	}
*/	

	local ratio=.35

quietly cap gen emp_est=0
quietly cap replace emp_est=0
local btm=1
foreach num in 5 10 20 50 100 250 500 1000 1500 2500 5000{
	local top=`num'-1
	local midpt`btm'_`top'=floor((`top'-`btm')*`ratio'+`btm')
	quietly cap drop emp_n`btm'_`top'
	quietly gen emp_n`btm'_`top' = `midpt`btm'_`top''*n`btm'_`top'
	quietly replace emp_est=emp_est+emp_n`btm'_`top'
	local btm=`num'
	}
	
	quietly cap drop emp_n5000 midpt5000 
	quietly gen emp_n5000=emp-emp_est if n5000!=0
	quietly replace emp_n5000=5000*n5000 if emp_n5000<5000*n5000 & n5000!=0
	quietly gen midpt5000=emp_n5000/n5000
	quietly replace midpt5000=0 if n5000==0
	quietly replace emp_est=emp_est+emp_n5000 if n5000!=0
	quietly sum emp_est emp if n5000==0 & emp!=0
	quietly sum emp_est emp if n5000!=0 & emp!=0

quietly gen lemp=log(emp)
quietly gen lemp_est=log(emp_est)
reg lemp lemp_est if n5000==0 & emp!=0


*get per plant estimate for each cell
local btm=1
foreach num in 5 10 20 50 100 250 500 1000 1500 2500 5000{
	local top=`num'-1
	gen emp_per_estn`btm'_`top'=emp_n`btm'_`top'/n`btm'_`top'
	local btm=`num'
	}
keep naics4 msa n* emp_per_estn* emp emp_est
egen cbsanaics4=group(msa naics4)
rename naics ind
rename naics4 ind4
reshape long emp_per_estn n, i(cbsanaics4) j(size) string
drop emp emp_est
expand n
drop if emp_per_estn==.
keep ind4 msa cbsanaics4 emp_per_estn

bys msa: egen S_emp=total(emp_per_estn)
bys msa ind4: egen SJ_emp=total(emp_per_estn)
gen empshare_SJ=SJ_emp/S_emp

gen gini_S=.
levelsof msa, local(cbsalist)
ineqdeco emp_per_estn, by(msa)
foreach cbsa in `cbsalist'{
	replace gini_S=r(gini_`cbsa') if msa==`cbsa'
	}

	gen gini_SJ=.
levelsof ind4, local(indlist)
foreach ind in `indlist'{
	quietly ineqdeco emp_per_estn if ind4==`ind', by(msa)
	foreach cbsa in `cbsalist'{
		quietly replace gini_SJ=r(gini_`cbsa') if msa==`cbsa' & ind4==`ind'
		}
	}
sort msa ind	
egen tag_S=tag(msa)
egen tag_SJ=tag(msa ind4)


xtile groupcbsa=msa ,nquantiles(10)
 foreach group of numlist 1(1)10{
		quietly glcurve emp_per_estn if groupcbsa==`group', by(msa) split pvar(xvar_`group') glvar(yvar) lorenz nograph 
		}
egen xvar=rowtotal(xvar*)
egen yvar=rowtotal(yvar*)

tempvar temp1
gen `temp1'=tag_S+1
expand `temp1', gen(expandedvar)
replace tag_S=0 if expandedvar==1
replace xvar=0 if expandedvar==1
replace yvar=0 if expandedvar==1

_pctile gini_S if tag_S, p(5, 50, 95)
cap gen S_5=1 if gini_S==r(r1)
cap gen S_50=1 if gini_S==r(r2)
cap gen S_95=1 if gini_S==r(r3)
local temp1=r(r1)
local S5th = round(`temp1',.01)
local temp1=r(r2)
local S50th = round(`temp1',.01)
local temp1=r(r3)
local S95th= round(`temp1'*100,1)/100

cap drop SJ_*
cap drop N_total
ren S_emp Semp
cap drop S_*
cap drop xtiles
ren Semp S_emp
bys msa ind4 (xvar):  gen N_total=_N-1

 _pctile gini_S if tag_S, p(10,25, 50, 75,90)
local temp10=r(r1)
local S10th = round(`temp10'*100,1)/100
local temp25=r(r2)
local S25th = round(`temp25',.01)
local temp50=r(r3)
local S50th = round(`temp50',.01)
local temp75=r(r4)
local S75th= round(`temp75',.01)
local temp90=r(r5)
local S90th = round(`temp90'*100,1)/100
xtile xtiles=gini_S if tag_S, nq(20) 
replace xtiles =xtiles *5

foreach pt of numlist 10 25 50 75 90{
tempvar tmp1 tmp2
egen `tmp1' = max(gini_S) if xtiles==`pt' 
gen `tmp2'=1 if gini_S==`tmp1'
bys msa (xvar): egen S_`pt'=max(`tmp2')
} 

sort msa  xvar

tempfile temp4
save `temp4'


twoway  (function x, lcolor(black)) ///
(line yvar xvar if S_10==1 ,  lpattern(solid)  lcolor(purple) lwidth(.5))  ///
(line yvar xvar if S_25==1 ,  lpattern(shortdash) lcolor(green)   lwidth(.8) ) ///
(line  yvar xvar if S_50==1,  lpattern(solid) lcolor(navy) lwidth(1)  ) ///
(line  yvar xvar if S_75==1,   lpattern(dash_dot) lcolor(red) lwidth(.8)    ) ///
(line  yvar xvar if S_90==1,  lpattern(dash) lcolor(orange) lwidth(.8)     ) ///
, yscale(range(0,1) alt) ylab(,nogrid) graphregion(color(white)) ///
bgcolor(white)  xtitle("Cumulative Plant Share, MSA") ytitle("Cumulative Employment Share, MSA") ///
ylab(,nogrid)  ///
legend( order( 2 "10{superscript:th} Percentile, Gini = 0`S10th'" 3 "25{superscript:th} Percentile, Gini = 0`S25th'" 4  "Median, Gini = 0`S50th'" 5  "75{superscript:th} Percentile, Gini = 0`S75th'" 6  "90{superscript:th} Percentile, Gini = 0`S90th'")  position(10) ring(0) cols(1) region(lwidth(none) lcolor(none) color(none)) symxsize(*.5) )  
graph export "$CBP_output/Fig_A2_xi_lorenz.png", replace width(1600)





drop xvar_* yvar_*
drop if expandedvar==1
drop expandedvar
foreach ind in `indlist'{
	foreach group of numlist 1(1)10{
		quietly glcurve emp_per_estn if groupcbsa==`group' & ind4==`ind', by(msa) split pvar(xvar_`group') glvar(yvar) lorenz nograph 
		}
	quietly egen xvarind_`ind'=rowtotal(xvar_*)
	quietly egen yvarind_`ind'=rowtotal(yvar_*)
	quietly drop xvar_* yvar_*
	}

egen xvar_SJ=rowtotal(xvarind*)
egen yvar_SJ=rowtotal(yvarind*)

tempvar temp1
gen `temp1'=tag_SJ+1
expand `temp1', gen(expandedvar)

replace tag_SJ=0 if expandedvar==1
replace xvar_SJ=0 if expandedvar==1
replace yvar_SJ=0 if expandedvar==1
sort msa ind4 xvar_SJ


tempfile temp5
save `temp5'


cap drop SJ_*
cap drop N_total
bys msa ind4 (xvar_SJ):  gen N_total=_N-1
count if tag_SJ==1
count if N_total<10 & tag_SJ==1
_pctile gini_SJ if tag_SJ & N_total>=10, p(10,25, 50, 75,90)
cap gen SJ_10=1 if gini_SJ==r(r1)
cap gen SJ_25=1 if gini_SJ==r(r2)
cap gen SJ_50=1 if gini_SJ==r(r3)
cap gen SJ_75=1 if gini_SJ==r(r4)
cap gen SJ_90=1 if gini_SJ==r(r5)
local temp1=r(r1)
local SJ10th = round(`temp1',.01)
local temp1=r(r2)
local SJ25th = round(`temp1',.01)
local temp1=r(r3)
local SJ50th = round(`temp1',.01)
local temp1=r(r4)
local SJ75th= round(`temp1'*10,.1)/10
local temp1=r(r5)
local SJ90th = round(`temp1',.01)

duplicates drop yvar_SJ xvar_SJ SJ_10 SJ_25 SJ_50 SJ_75 SJ_90, force

twoway  (function x, lcolor(black)) ///
(line yvar_SJ xvar_SJ if SJ_10==1  ,  lpattern(solid)  lcolor(purple) lwidth(.5))  ///
(line yvar_SJ xvar_SJ if SJ_25==1 ,  lpattern(shortdash) lcolor(green)   lwidth(.8) ) ///
(line  yvar_SJ xvar_SJ if SJ_50==1 ,   lpattern(solid) lcolor(navy) lwidth(1)  ) ///
(line  yvar_SJ xvar_SJ if SJ_75==1 ,   lpattern(dash_dot) lcolor(red) lwidth(.8)    ) ///
(line  yvar_SJ xvar_SJ if SJ_90==1,  lpattern(dash) lcolor(orange) lwidth(.8)     ) ///
, yscale(range(0,1) alt) ylab(,nogrid) graphregion(color(white)) ///
bgcolor(white)  xtitle("Cumulative Plant Share, MSA-Industry") ytitle("Cumulative Employment Share, MSA-Industry") ///
ylab(,nogrid)  ///
legend( order( 2 "10{superscript:th} Percentile, Gini = 0`SJ10th'" 3 "25{superscript:th} Percentile, Gini = 0`SJ25th'" 4  "Median, Gini = 0`SJ50th'" 5  "75{superscript:th} Percentile, Gini = 0.71" 6  "90{superscript:th} Percentile, Gini = 0`SJ90th'")  position(10) ring(0) cols(1) region(lwidth(none) lcolor(none) color(none)) symxsize(*.5) )  
graph export "$CBP_output/Fig_A2_tau_lorenz.png", replace width(1600)




